Simple binary classification problem utilizing convolutional neural networks

Import libraries


In [1]:
# Import libraries. 

from __future__ import print_function
import os
import sys
# os.environ['THEANO_FLAGS']='mode=FAST_RUN,device=gpu0,floatX=float32,optimizer=fast_compile'
# os.environ['KERAS_BACKEND'] = 'theano'
"""
os.environ['THEANO_FLAGS']='mode=FAST_RUN,device=gpu3,floatX=float32,optimizer=fast_compile'
os.environ['KERAS_BACKEND'] = 'theano'

In case you want to select a graphic card (i the above code i set the 3rd graphic card.) 
"""
# from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten, BatchNormalization
from keras.layers import Convolution2D, MaxPooling2D
from keras.optimizers import SGD
from keras.utils import np_utils
from keras.callbacks import ModelCheckpoint
from sklearn.metrics import accuracy_score,roc_auc_score
from sklearn.metrics import roc_curve, auc
from sklearn import metrics
from sklearn.metrics import classification_report
from sklearn.cross_validation import train_test_split
from sklearn.preprocessing import StandardScaler, RobustScaler
import numpy as np
import keras 
import keras.backend as K
from keras.callbacks import LearningRateScheduler
import math
from keras import callbacks
import glob
from PIL import Image
from sklearn.cross_validation import train_test_split
from keras.utils.visualize_util import plot


Using Theano backend.
Using gpu device 0: GeForce GTX 750 Ti (CNMeM is disabled, cuDNN 5105)
/usr/local/lib/python3.5/dist-packages/theano/sandbox/cuda/__init__.py:600: UserWarning: Your cuDNN version is more recent than the one Theano officially supports. If you see any problems, try updating Theano or downgrading cuDNN to version 5.
  warnings.warn(warn)
/usr/local/lib/python3.5/dist-packages/sklearn/cross_validation.py:44: DeprecationWarning: This module was deprecated in version 0.18 in favor of the model_selection module into which all the refactored classes and functions are moved. Also note that the interface of the new CV iterators are different from that of this module. This module will be removed in 0.20.
  "This module will be removed in 0.20.", DeprecationWarning)

Basic functions


In [2]:
# It is good to know the pid of the running code in case you need to stop  or monitor. 
print (os.getpid())
file_open = lambda x,y: glob.glob(os.path.join(x,y))

# learning rate schedule. It is helpful when the learning rate can be dynamically set up. We will be using the callback functionality that keras provides. 
def step_decay(epoch):
  initial_lrate =0.01
  drop = 0.3
  epochs_drop = 30.0
  lrate = initial_lrate * math.pow(drop, math.floor((1+epoch)/epochs_drop))
  print (lrate)
  return lrate

# The following function will be used to give a number of the parameters in our model. Useful when we need to get an estimate of what size of dataset we have to use.  
def size(model): 
  return sum([np.prod(K.get_value(w).shape) for w in model.trainable_weights])

def createmodel(img_channels,img_rows,img_cols):
  # This is a Sequential model. Graph models can be used in order to create more complex networks. 
  # Teaching Points:
  # 1. Here we utilize the adam optimization algorithm. In order to use the SGD algorithm one could replace the {adam=keras.optimizers.Adadelta(lr=0)} line with  {sgd = SGD(lr=0.0, momentum=0.9, decay=0.0, nesterov=False)} make sure you import the correct optimizer from keras. 
  # 2. This is a binary classification problem so make sure that the correct activation loss function combination is used. For such a problem the sigmoid activation function with the binary cross entropy loss is a good option
  # 3. Since this is a binary problem use   model.add(Dense(1)) NOT 2...
  # 4. For multi class model this code can be easily modified by selecting the softmax as activation function and the categorical cross entropy as loss 

  model = Sequential()
  model.add(Convolution2D(16, 3, 3, border_mode='same',input_shape=(1, img_rows, img_cols)))
  model.add(Activation('relu'))
  model.add(Convolution2D(16, 5, 5, border_mode='same'))
  model.add(BatchNormalization())
  model.add(Activation('relu'))
  model.add(MaxPooling2D(pool_size=(2, 2)))
  model.add(Convolution2D(32, 3, 3, border_mode='same'))
  model.add(BatchNormalization())
  model.add(Activation('relu'))
  model.add(Convolution2D(64, 5, 5, border_mode='same'))
  model.add(BatchNormalization())
  model.add(Activation('relu'))
  model.add(Convolution2D(64, 3, 3, border_mode='same'))
  model.add(BatchNormalization())
  model.add(Activation('relu'))
  model.add(MaxPooling2D(pool_size=(2, 2)))
  model.add(Convolution2D(128, 3, 3, border_mode='same'))
  model.add(BatchNormalization())
  model.add(Activation('relu'))
  model.add(MaxPooling2D(pool_size=(2, 2)))
  model.add(Flatten())
  model.add(Dense(128, init='he_normal'))
  model.add(Activation('relu'))
  model.add(Dropout(0.5)) 
  model.add(Dense(32, init='he_normal'))
  model.add(Activation('relu'))
  model.add(Dropout(0.5)) 
  model.add(Dense(1))
  # model.add(Activation('relu'))
  model.add(Activation('sigmoid'))
  # learning schedule callback
  adam=keras.optimizers.Adadelta(lr=0)
  lrate = LearningRateScheduler(step_decay)
  model.compile(loss='binary_crossentropy', optimizer=adam, metrics=['accuracy'])
  plot(model, to_file='model.png')
  return model

def shuffle(X, y):
  perm = np.random.permutation(len(y))
  X = X[perm]
  y = y[perm]
  print (np.shape(X))
  return X, y


def read_data(image):
  "opens image and converts it to a m*n matrix" 
  image = Image.open(image)
  image = image.getdata()
  # image = list(image.getdata())
  # image = map(list,image)
  image = np.array(image)
  return image.reshape(-1)

def createTrainTestValset(image_dir1, image_dir2):
  Class1_images = file_open(image_dir1,"*.jpg")
  Class2_images = file_open(image_dir2,"*.jpg")
  Class1_set = []
  Class2_set = []
  # Read all the files, and create numpy arrays. 
  Class1_set = [read_data(image) for image in Class1_images]
  Class2_set = [read_data(image) for image in Class2_images]
  Class1_set = np.array(Class1_set) #This is where the Memory Error occurs
  Class2_set = np.array(Class2_set)
  X=np.vstack((Class1_set, Class2_set))
  X=X.astype(np.float)/255
  # print (np.shape(X))
  yclass1=np.zeros((np.shape(Class1_set)[0]))
  yclass2=np.ones((np.shape(Class2_set)[0]))
  # print (np.shape(yclass1))
  y=np.concatenate((yclass1, yclass2))
  # print (np.shape(y)) 
  X,y=shuffle(X, y)
  print (np.shape(X)) 
  print (np.max(X))
  print (np.shape(y)) 
  X_train, X_val,y_train, y_val= train_test_split(X,y, test_size=0.2, random_state=42)
  return X_train,y_train, X_val, y_val 

  # Read the images; and split them in three different sets. 
def trainandpredict(Scan=32 ,img_channels=1,batch_size=64,nb_epoch=5,data_augmentation=False):
  img_rows=Scan
  img_cols=Scan
  CurrentDir= os.getcwd()
  image_dir1=os.path.abspath(os.path.join(os.path.abspath(os.path.join(CurrentDir, os.pardir)), "Data","negative_images"))
#   print(image_dir1)
  image_dir2=os.path.abspath(os.path.join(os.path.abspath(os.path.join(CurrentDir, os.pardir)), "Data","positive_images"))
  modeleval=createmodel(img_channels,img_rows,img_cols)
  X_train,y_train, X_val, y_val = createTrainTestValset(image_dir1, image_dir2)
  X_train =X_train.reshape(
    -1,  # number of samples, -1 makes it so that this number is determined automatically
    1,   # 1 color channel, since images are only black and white
    Scan,  # first image dimension (vertical)
    Scan,  # second image dimension (horizontal)
  )
  X_val =X_val.reshape(
    -1,  # number of samples, -1 makes it so that this number is determined automatically
    1,   # 1 color channel, since images are only black and white
    Scan,  # first image dimension (vertical)
    Scan,  # second image dimension (horizontal)
  )
  # Callbacks
  best_model = ModelCheckpoint('Final.h5', verbose=1, monitor='val_loss',save_best_only=True)
  lrate = LearningRateScheduler(step_decay)

  # Data augmentation is always a good choice
  if not data_augmentation:
    print('Not using data augmentation.')
    modeleval.load_weights('Final.h5')
    modeleval.fit(X_train, y_train,batch_size=batch_size,nb_epoch=nb_epoch,validation_split=0.1,callbacks=[best_model,lrate],shuffle=True)
  else:
    print('Using real-time data augmentation.')
    print ("pending")
    sys.exit()
    # this will do preprocessing and realtime data augmentation
    # datagen = ImageDataGenerator(
    #   featurewise_center=False,  # set input mean to 0 over the dataset
    #   samplewise_center=False,  # set each sample mean to 0
    #   featurewise_std_normalization=False,  # divide inputs by std of the dataset
    #   samplewise_std_normalization=False,  # divide each input by its std
    #   zca_whitening=False,  # apply ZCA whitening
    #   rotation_range=3,  # randomly rotate images in the range (degrees, 0 to 180)
    #   width_shift_range=0.2,  # randomly shift images horizontally (fraction of total width)
    #   height_shift_range=0.2,  # randomly shift images vertically (fraction of total height)
    #   horizontal_flip=True,  # randomly flip images
    #   vertical_flip=False)  # randomly flip images
    # modeleval.fit(X_train, y_train,batch_size=batch_size,nb_epoch=nb_epoch,validation_data=(X_train1, y_train1),callbacks=[best_model,lrate],shuffle=True)


  # Some evaluation Just the basic stuff... 
  print (dir(modeleval))
  Y_cv_pred = modeleval.predict(X_val, batch_size = 32)
  roc =roc_auc_score(y_val, Y_cv_pred)
  print("ROC:", roc)
  print (Y_cv_pred)
  Y_cv_pred[Y_cv_pred>=.5]=1
  Y_cv_pred[Y_cv_pred<.5]=0
  target_names=[] 
  # print ("The f1-score gives you the harmonic mean of precision and recall. The scores corresponding to every class will tell you the accuracy of the classifier in classifying the data points in that particular class compared to all other classes.The support is the number of samples of the true response that lie in that class.")
  target_names = ['class 0', 'class 1']
  print(classification_report(y_val, Y_cv_pred, target_names=target_names,digits=4))

# It is good to know the pid of the running code in case you need to stop  or monitor. 
print (os.getpid())
file_open = lambda x,y: glob.glob(os.path.join(x,y))

# learning rate schedule. It is helpful when the learning rate can be dynamically set up. We will be using the callback functionality that keras provides. 
def step_decay(epoch):
  initial_lrate =0.01
  drop = 0.3
  epochs_drop = 30.0
  lrate = initial_lrate * math.pow(drop, math.floor((1+epoch)/epochs_drop))
  print (lrate)
  return lrate

# The following function will be used to give a number of the parameters in our model. Useful when we need to get an estimate of what size of dataset we have to use.  
def size(model): 
  return sum([np.prod(K.get_value(w).shape) for w in model.trainable_weights])

def createmodel(img_channels,img_rows,img_cols):
  # This is a Sequential model. Graph models can be used in order to create more complex networks. 
  # Teaching Points:
  # 1. Here we utilize the adam optimization algorithm. In order to use the SGD algorithm one could replace the {adam=keras.optimizers.Adadelta(lr=0)} line with  {sgd = SGD(lr=0.0, momentum=0.9, decay=0.0, nesterov=False)} make sure you import the correct optimizer from keras. 
  # 2. This is a binary classification problem so make sure that the correct activation loss function combination is used. For such a problem the sigmoid activation function with the binary cross entropy loss is a good option
  # 3. Since this is a binary problem use   model.add(Dense(1)) NOT 2...
  # 4. For multi class model this code can be easily modified by selecting the softmax as activation function and the categorical cross entropy as loss 

  model = Sequential()
  model.add(Convolution2D(16, 3, 3, border_mode='same',input_shape=(1, img_rows, img_cols)))
  model.add(Activation('relu'))
  model.add(Convolution2D(16, 5, 5, border_mode='same'))
  model.add(BatchNormalization())
  model.add(Activation('relu'))
  model.add(MaxPooling2D(pool_size=(2, 2)))
  model.add(Convolution2D(32, 3, 3, border_mode='same'))
  model.add(BatchNormalization())
  model.add(Activation('relu'))
  model.add(Convolution2D(64, 5, 5, border_mode='same'))
  model.add(BatchNormalization())
  model.add(Activation('relu'))
  model.add(Convolution2D(64, 3, 3, border_mode='same'))
  model.add(BatchNormalization())
  model.add(Activation('relu'))
  model.add(MaxPooling2D(pool_size=(2, 2)))
  model.add(Convolution2D(128, 3, 3, border_mode='same'))
  model.add(BatchNormalization())
  model.add(Activation('relu'))
  model.add(MaxPooling2D(pool_size=(2, 2)))
  model.add(Flatten())
  model.add(Dense(128, init='he_normal'))
  model.add(Activation('relu'))
  model.add(Dropout(0.5)) 
  model.add(Dense(32, init='he_normal'))
  model.add(Activation('relu'))
  model.add(Dropout(0.5)) 
  model.add(Dense(1))
  # model.add(Activation('relu'))
  model.add(Activation('sigmoid'))
  # learning schedule callback
  adam=keras.optimizers.Adadelta(lr=0)
  lrate = LearningRateScheduler(step_decay)
  model.compile(loss='binary_crossentropy', optimizer=adam, metrics=['accuracy'])
  plot(model, to_file='model.png')
  return model

def shuffle(X, y):
  perm = np.random.permutation(len(y))
  X = X[perm]
  y = y[perm]
  print (np.shape(X))
  return X, y


def read_data(image):
  "opens image and converts it to a m*n matrix" 
  image = Image.open(image)
  image = image.getdata()
  # image = list(image.getdata())
  # image = map(list,image)
  image = np.array(image)
  return image.reshape(-1)

def createTrainTestValset(image_dir1, image_dir2):
  Class1_images = file_open(image_dir1,"*.jpg")
  Class2_images = file_open(image_dir2,"*.jpg")
  Class1_set = []
  Class2_set = []
  # Read all the files, and create numpy arrays. 
  Class1_set = [read_data(image) for image in Class1_images]
  Class2_set = [read_data(image) for image in Class2_images]
  Class1_set = np.array(Class1_set) #This is where the Memory Error occurs
  Class2_set = np.array(Class2_set)
  X=np.vstack((Class1_set, Class2_set))
  X=X.astype(np.float)/255
  # print (np.shape(X))
  yclass1=np.zeros((np.shape(Class1_set)[0]))
  yclass2=np.ones((np.shape(Class2_set)[0]))
  # print (np.shape(yclass1))
  y=np.concatenate((yclass1, yclass2))
  # print (np.shape(y)) 
  X,y=shuffle(X, y)
  print (np.shape(X)) 
  print (np.max(X))
  print (np.shape(y)) 
  X_train, X_val,y_train, y_val= train_test_split(X,y, test_size=0.2, random_state=42)
  return X_train,y_train, X_val, y_val 

  # Read the images; and split them in three different sets. 
def trainandpredict(Scan=32 ,img_channels=1,batch_size=64,nb_epoch=5,data_augmentation=False):
  img_rows=Scan
  img_cols=Scan
  CurrentDir= os.getcwd()
  image_dir1=os.path.abspath(os.path.join(os.path.abspath(os.path.join(CurrentDir, os.pardir)), "Data","negative_images"))
#   print(image_dir1)
  image_dir2=os.path.abspath(os.path.join(os.path.abspath(os.path.join(CurrentDir, os.pardir)), "Data","positive_images"))
  modeleval=createmodel(img_channels,img_rows,img_cols)
  X_train,y_train, X_val, y_val = createTrainTestValset(image_dir1, image_dir2)
  X_train =X_train.reshape(
    -1,  # number of samples, -1 makes it so that this number is determined automatically
    1,   # 1 color channel, since images are only black and white
    Scan,  # first image dimension (vertical)
    Scan,  # second image dimension (horizontal)
  )
  X_val =X_val.reshape(
    -1,  # number of samples, -1 makes it so that this number is determined automatically
    1,   # 1 color channel, since images are only black and white
    Scan,  # first image dimension (vertical)
    Scan,  # second image dimension (horizontal)
  )
  # Callbacks
  best_model = ModelCheckpoint('Final.h5', verbose=1, monitor='val_loss',save_best_only=True)
  lrate = LearningRateScheduler(step_decay)

  # Data augmentation is always a good choice
  if not data_augmentation:
    print('Not using data augmentation.')
    modeleval.load_weights('Final.h5')
    modeleval.fit(X_train, y_train,batch_size=batch_size,nb_epoch=nb_epoch,validation_split=0.1,callbacks=[best_model,lrate],shuffle=True)
  else:
    print('Using real-time data augmentation.')
    print ("pending")
    sys.exit()
    # this will do preprocessing and realtime data augmentation
    # datagen = ImageDataGenerator(
    #   featurewise_center=False,  # set input mean to 0 over the dataset
    #   samplewise_center=False,  # set each sample mean to 0
    #   featurewise_std_normalization=False,  # divide inputs by std of the dataset
    #   samplewise_std_normalization=False,  # divide each input by its std
    #   zca_whitening=False,  # apply ZCA whitening
    #   rotation_range=3,  # randomly rotate images in the range (degrees, 0 to 180)
    #   width_shift_range=0.2,  # randomly shift images horizontally (fraction of total width)
    #   height_shift_range=0.2,  # randomly shift images vertically (fraction of total height)
    #   horizontal_flip=True,  # randomly flip images
    #   vertical_flip=False)  # randomly flip images
    # modeleval.fit(X_train, y_train,batch_size=batch_size,nb_epoch=nb_epoch,validation_data=(X_train1, y_train1),callbacks=[best_model,lrate],shuffle=True)


  # Some evaluation Just the basic stuff... 
  print (dir(modeleval))
  Y_cv_pred = modeleval.predict(X_val, batch_size = 32)
  roc =roc_auc_score(y_val, Y_cv_pred)
  print("ROC:", roc)
  print (Y_cv_pred)
  Y_cv_pred[Y_cv_pred>=.5]=1
  Y_cv_pred[Y_cv_pred<.5]=0
  target_names=[] 
  # print ("The f1-score gives you the harmonic mean of precision and recall. The scores corresponding to every class will tell you the accuracy of the classifier in classifying the data points in that particular class compared to all other classes.The support is the number of samples of the true response that lie in that class.")
  target_names = ['class 0', 'class 1']
  print(classification_report(y_val, Y_cv_pred, target_names=target_names,digits=4))


6241
6241

Run Program


In [3]:
if __name__ == '__main__':
    trainandpredict()


(8710, 1024)
(8710, 1024)
1.0
(8710,)
Not using data augmentation.
Train on 6271 samples, validate on 697 samples
0.01
Epoch 1/5
6208/6271 [============================>.] - ETA: 0s - loss: 0.3776 - acc: 0.8225Epoch 00000: val_loss improved from inf to 0.32175, saving model to Final.h5
6271/6271 [==============================] - 25s - loss: 0.3773 - acc: 0.8222 - val_loss: 0.3218 - val_acc: 0.8809
0.01
Epoch 2/5
6208/6271 [============================>.] - ETA: 0s - loss: 0.3645 - acc: 0.8328Epoch 00001: val_loss improved from 0.32175 to 0.31808, saving model to Final.h5
6271/6271 [==============================] - 26s - loss: 0.3644 - acc: 0.8332 - val_loss: 0.3181 - val_acc: 0.8766
0.01
Epoch 3/5
6208/6271 [============================>.] - ETA: 0s - loss: 0.3629 - acc: 0.8307Epoch 00002: val_loss improved from 0.31808 to 0.31692, saving model to Final.h5
6271/6271 [==============================] - 26s - loss: 0.3635 - acc: 0.8302 - val_loss: 0.3169 - val_acc: 0.8809
0.01
Epoch 4/5
6208/6271 [============================>.] - ETA: 0s - loss: 0.3624 - acc: 0.8313Epoch 00003: val_loss improved from 0.31692 to 0.31579, saving model to Final.h5
6271/6271 [==============================] - 26s - loss: 0.3629 - acc: 0.8305 - val_loss: 0.3158 - val_acc: 0.8852
0.01
Epoch 5/5
6208/6271 [============================>.] - ETA: 0s - loss: 0.3578 - acc: 0.8384Epoch 00004: val_loss improved from 0.31579 to 0.31230, saving model to Final.h5
6271/6271 [==============================] - 26s - loss: 0.3574 - acc: 0.8385 - val_loss: 0.3123 - val_acc: 0.8824
['__call__', '__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__gt__', '__hash__', '__init__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', '_fit_loop', '_flattened_layers', '_gather_dict_attr', '_gather_list_attr', '_get_node_attribute_at_index', '_make_predict_function', '_make_test_function', '_make_train_function', '_output_mask_cache', '_output_shape_cache', '_output_tensor_cache', '_predict_loop', '_standardize_user_data', '_test_loop', '_updated_config', 'add', 'add_inbound_node', 'assert_input_compatibility', 'build', 'built', 'call', 'compile', 'compute_mask', 'constraints', 'container_nodes', 'count_params', 'create_input_layer', 'evaluate', 'evaluate_generator', 'fit', 'fit_generator', 'flattened_layers', 'from_config', 'get_config', 'get_input_at', 'get_input_mask_at', 'get_input_shape_at', 'get_layer', 'get_output_at', 'get_output_mask_at', 'get_output_shape_at', 'get_output_shape_for', 'get_weights', 'inbound_nodes', 'input', 'input_layers', 'input_layers_node_indices', 'input_layers_tensor_indices', 'input_mask', 'input_names', 'input_shape', 'input_spec', 'inputs', 'layers', 'load_weights', 'load_weights_from_hdf5_group', 'load_weights_from_hdf5_group_by_name', 'loss', 'loss_weights', 'metrics', 'metrics_names', 'metrics_tensors', 'model', 'name', 'nodes_by_depth', 'non_trainable_weights', 'optimizer', 'outbound_nodes', 'output', 'output_layers', 'output_layers_node_indices', 'output_layers_tensor_indices', 'output_mask', 'output_names', 'output_shape', 'outputs', 'pop', 'predict', 'predict_classes', 'predict_generator', 'predict_on_batch', 'predict_proba', 'regularizers', 'reset_states', 'run_internal_graph', 'sample_weight_mode', 'save', 'save_weights', 'save_weights_to_hdf5_group', 'set_input', 'set_weights', 'state_updates', 'stateful', 'stop_training', 'summary', 'supports_masking', 'test_on_batch', 'to_json', 'to_yaml', 'train_on_batch', 'trainable', 'trainable_weights', 'training_data', 'updates', 'uses_learning_phase', 'validation_data', 'weights']
ROC: 0.936208267091
[[ 0.37849411]
 [ 0.24506178]
 [ 0.98440951]
 ..., 
 [ 0.54057229]
 [ 0.51199996]
 [ 0.76022846]]
             precision    recall  f1-score   support

    class 0     0.7882    0.7761    0.7821       518
    class 1     0.9058    0.9118    0.9088      1224

avg / total     0.8709    0.8714    0.8711      1742


In [ ]: